From 179b35aad75d11bb9168f1b69785ff74374b6855 Mon Sep 17 00:00:00 2001 From: "iap10@labyrinth.cl.cam.ac.uk" Date: Fri, 7 Jan 2005 01:41:05 +0000 Subject: [PATCH] bitkeeper revision 1.1159.212.10 (41dde8b1uPUodrtnTKejSUrLg-fcoQ) xend extensiosn for VMX guests --- .rootkeys | 6 + tools/examples/Makefile | 3 + tools/examples/bochsrc | 19 + tools/examples/mem-map.sxp | 10 + tools/examples/xmexample.vmx | 93 +++ tools/libxc/linux_boot_params.h | 163 +++++ tools/libxc/xc.h | 14 + tools/libxc/xc_vmx_build.c | 865 ++++++++++++++++++++++++ tools/python/xen/lowlevel/xc/xc.c | 93 +++ tools/python/xen/util/memmap.py | 41 ++ tools/python/xen/xend/XendDomainInfo.py | 70 +- tools/python/xen/xm/create.py | 22 + 12 files changed, 1389 insertions(+), 10 deletions(-) create mode 100644 tools/examples/bochsrc create mode 100644 tools/examples/mem-map.sxp create mode 100644 tools/examples/xmexample.vmx create mode 100644 tools/libxc/linux_boot_params.h create mode 100644 tools/libxc/xc_vmx_build.c create mode 100644 tools/python/xen/util/memmap.py diff --git a/.rootkeys b/.rootkeys index 0b4b69a4d7..ca97aacf12 100644 --- a/.rootkeys +++ b/.rootkeys @@ -320,14 +320,18 @@ 401d7e16UgeqroJQTIhwkrDVkoWgZQ tools/examples/README 41597996VhTbNuHbuscYSfRb-WR6fA tools/examples/block-enbd 41597996GHP2_yVih2UspXh328fgMQ tools/examples/block-file +41dde8af16Hulg1pgW8aOnbbxyrl7w tools/examples/bochsrc 405ff55dawQyCHFEnJ067ChPRoXBBA tools/examples/init.d/xend 40278d94cIUWl2eRgnwZtr4hTyWT1Q tools/examples/init.d/xendomains +41dde8afTUuvdtFUlOx0ZRusKxyd8w tools/examples/mem-map.sxp 40ee75a9xFz6S05sDKu-JCLqyVTkDA tools/examples/network 40ee75a967sxgcRY4Q7zXoVUaJ4flA tools/examples/vif-bridge 40ee75a93cqxHp6MiYXxxwR5j2_8QQ tools/examples/xend-config.sxp +41dde8af6M2Pm1Rrv_f5jEFC_BIOIA tools/examples/xmexample.vmx 41090ec8Pj_bkgCBpg2W7WfmNkumEA tools/examples/xmexample1 40cf2937oKlROYOJTN8GWwWM5AmjBg tools/examples/xmexample2 3fbba6dbDfYvJSsw9500b4SZyUhxjQ tools/libxc/Makefile +41dde8afKYRKxS4XtLv1KUegGQy_bg tools/libxc/linux_boot_params.h 41cc934abX-QLXJXW_clV_wRjM0zYg tools/libxc/plan9a.out.h 3fbba6dc1uU7U3IFeF6A-XEOYF2MkQ tools/libxc/rpm.spec 3fbba6dcrNxtygEcgJYAJJ1gCQqfsA tools/libxc/xc.h @@ -347,6 +351,7 @@ 3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/libxc/xc_private.c 3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxc/xc_private.h 40589968UQFnJeOMn8UIFLbXBuwXjw tools/libxc/xc_rrobin.c +41dde8b0pLfAKMs_L9Uri2hnzHiCRQ tools/libxc/xc_vmx_build.c 40e1b09dMYB4ItGCqcMIzirdMd9I-w tools/libxutil/Makefile 40e033325Sjqs-_4TuzeUEprP_gYFg tools/libxutil/allocate.c 40e03332KYz7o1bn2MG_KPbBlyoIMA tools/libxutil/allocate.h @@ -473,6 +478,7 @@ 40dfd40aGqGkiopOOgJxSF4iCbHM0Q tools/python/xen/util/__init__.py 4055ee4dwy4l0MghZosxoiu6zmhc9Q tools/python/xen/util/console_client.py 40c9c468IienauFHQ_xJIcqnPJ8giQ tools/python/xen/util/ip.py +41dde8b0yuJX-S79w4xJKxBQ-Mhp1A tools/python/xen/util/memmap.py 4059c6a0pnxhG8hwSOivXybbGOwuXw tools/python/xen/util/tempfile.py 40c9c468SNuObE_YWARyS0hzTPSzKg tools/python/xen/xend/Args.py 41597996WNvJA-DVCBmc0xU9w_XmoA tools/python/xen/xend/Blkctl.py diff --git a/tools/examples/Makefile b/tools/examples/Makefile index 1e77f89d50..f0f8250ea0 100644 --- a/tools/examples/Makefile +++ b/tools/examples/Makefile @@ -8,6 +8,9 @@ XEN_CONFIG_DIR = /etc/xen XEN_CONFIGS = xend-config.sxp XEN_CONFIGS += xmexample1 XEN_CONFIGS += xmexample2 +XEN_CONFIGS += xmexample.vmx +XEN_CONFIGS += mem-map.sxp +XEN_CONFIGS += bochsrc # Xen script dir and scripts to go there. XEN_SCRIPT_DIR = /etc/xen/scripts diff --git a/tools/examples/bochsrc b/tools/examples/bochsrc new file mode 100644 index 0000000000..907c78bae2 --- /dev/null +++ b/tools/examples/bochsrc @@ -0,0 +1,19 @@ +#megs: 32 +#romimage: file=$BXSHARE/BIOS-bochs-latest, address=0xf0000 +#vgaromimage: $BXSHARE/VGABIOS-lgpl-latest +floppya: 1_44=a.img, status=inserted +floppyb: 1_44=b.img, status=inserted +#ata0-master: type=disk, path=minibootable.img, cylinders=900, heads=15, spt=17 +# if you don't use absolute paths below, bochs looks under the cwd of xend, +# which is usually "/" +ata0-master: type=disk, path=/tmp/min-fc2-i386.img, cylinders=800, heads=4, spt=32 +boot: c + +log: /tmp/bochsout.txt +#debug: action=report +info: action=report +error: action=report +panic: action=ask + +mouse: enabled=0 +ips: 1500000 diff --git a/tools/examples/mem-map.sxp b/tools/examples/mem-map.sxp new file mode 100644 index 0000000000..246b49b92a --- /dev/null +++ b/tools/examples/mem-map.sxp @@ -0,0 +1,10 @@ +(memmap + (0000000000000000 000000000009f800 "AddressRangeMemory" WB) + (000000000009f800 00000000000a0000 "AddressRangeReserved" UC) + (00000000000a0000 00000000000bffff "AddressRangeIO" UC) + (00000000000f0000 0000000000100000 "AddressRangeReserved" UC) + (0000000000100000 0000000008000000 "AddressRangeMemory" WB) + (0000000007fff000 0000000008000000 "AddressRangeShared" WB) + (0000000008000000 0000000008003000 "AddressRangeNVS" UC) + (0000000008003000 000000000800d000 "AddressRangeACPI" WB) + (00000000fec00000 0000000100000000 "AddressRangeIO" UC)) diff --git a/tools/examples/xmexample.vmx b/tools/examples/xmexample.vmx new file mode 100644 index 0000000000..6e9039584b --- /dev/null +++ b/tools/examples/xmexample.vmx @@ -0,0 +1,93 @@ +# -*- mode: python; -*- +#============================================================================ +# Python configuration setup for 'xm create'. +# This script sets the parameters used when a domain is created using 'xm create'. +# You use a separate script for each domain you want to create, or +# you can set the parameters for the domain on the xm command line. +#============================================================================ + +#---------------------------------------------------------------------------- +# Kernel image file. +kernel = "/boot/vmlinuz-rhel3-static" + +# Optional ramdisk. +#ramdisk = "/boot/initrd.gz" + +# The domain build function. Default is 'linux'. +builder='vmx' +#builder='linux' +#builder='netbsd' + +# Initial memory allocation (in megabytes) for the new domain. +memory = 128 + +# A name for your domain. All domains must have different names. +name = "ExampleVMXDomain" + +# Which CPU to start domain on? +#cpu = -1 # leave to Xen to pick + +#---------------------------------------------------------------------------- +# Define network interfaces. + +# Number of network interfaces. Default is 1. +#nics=1 +nics=0 + +# Optionally define mac and/or bridge for the network interfaces. +# Random MACs are assigned if not given. +#vif = [ 'mac=aa:00:00:00:00:11, bridge=xen-br0' ] + +#---------------------------------------------------------------------------- +# Define the disk devices you want the domain to have access to, and +# what you want them accessible as. +# Each disk entry is of the form phy:UNAME,DEV,MODE +# where UNAME is the device, DEV is the device name the domain will see, +# and MODE is r for read-only, w for read-write. + +#disk = [ 'phy:hda1,hda1,r' ] + +#---------------------------------------------------------------------------- +# Set the kernel command line for the new domain. +# You only need to define the IP parameters and hostname if the domain's +# IP config doesn't, e.g. in ifcfg-eth0 or via DHCP. +# You can use 'extra' to set the runlevel and custom environment +# variables used by custom rc scripts (e.g. VMID=, usr= ). + +# Set if you want dhcp to allocate the IP address. +#dhcp="dhcp" +# Set netmask. +#netmask= +# Set default gateway. +#gateway= +# Set the hostname. +#hostname= "vm%d" % vmid + +# Set root device. +#root = "/dev/ram0" +root = "/dev/hda1 ro" + +# Root device for nfs. +#root = "/dev/nfs" +# The nfs server. +#nfs_server = '169.254.1.0' +# Root directory on the nfs server. +#nfs_root = '/full/path/to/root/directory' + +# Sets runlevel 4. +extra = "1" + +#---------------------------------------------------------------------------- +# Set according to whether you want the domain restarted when it exits. +# The default is 'onreboot', which restarts the domain when it shuts down +# with exit code reboot. +# Other values are 'always', and 'never'. + +#restart = 'onreboot' + +#============================================================================ + +# New stuff +memmap = '/etc/xen/mem-map.sxp' +device_model = '/usr/sbin/device-model' +device_config = '/etc/xen/bochsrc' diff --git a/tools/libxc/linux_boot_params.h b/tools/libxc/linux_boot_params.h new file mode 100644 index 0000000000..749a41f630 --- /dev/null +++ b/tools/libxc/linux_boot_params.h @@ -0,0 +1,163 @@ +#ifndef __LINUX_BOOT_PARAMS_H__ +#define __LINUX_BOOT_PARAMS_H__ + +#include + +#define E820MAX 32 + +struct mem_map { + int nr_map; + struct entry { + unsigned long long addr; /* start of memory segment */ + unsigned long long size; /* size of memory segment */ + unsigned long type; /* type of memory segment */ +#define E820_RAM 1 +#define E820_RESERVED 2 +#define E820_ACPI 3 /* usable as RAM once ACPI tables have been read */ +#define E820_NVS 4 + + unsigned long caching_attr; /* used by hypervisor */ +#define MEMMAP_UC 0 +#define MEMMAP_WC 1 +#define MEMMAP_WT 4 +#define MEMMAP_WP 5 +#define MEMMAP_WB 6 + + }map[E820MAX]; +}; + +struct e820entry { + unsigned long long addr; /* start of memory segment */ + unsigned long long size; /* size of memory segment */ + unsigned long type; /* type of memory segment */ +}; + +struct e820map { + int nr_map; + struct e820entry map[E820MAX]; +}; + +struct drive_info_struct { __u8 dummy[32]; }; + +struct sys_desc_table { + __u16 length; + __u8 table[318]; +}; + +struct screen_info { + unsigned char orig_x; /* 0x00 */ + unsigned char orig_y; /* 0x01 */ + unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */ + unsigned short orig_video_page; /* 0x04 */ + unsigned char orig_video_mode; /* 0x06 */ + unsigned char orig_video_cols; /* 0x07 */ + unsigned short unused2; /* 0x08 */ + unsigned short orig_video_ega_bx; /* 0x0a */ + unsigned short unused3; /* 0x0c */ + unsigned char orig_video_lines; /* 0x0e */ + unsigned char orig_video_isVGA; /* 0x0f */ + unsigned short orig_video_points; /* 0x10 */ + + /* VESA graphic mode -- linear frame buffer */ + unsigned short lfb_width; /* 0x12 */ + unsigned short lfb_height; /* 0x14 */ + unsigned short lfb_depth; /* 0x16 */ + unsigned long lfb_base; /* 0x18 */ + unsigned long lfb_size; /* 0x1c */ + unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ + unsigned short lfb_linelength; /* 0x24 */ + unsigned char red_size; /* 0x26 */ + unsigned char red_pos; /* 0x27 */ + unsigned char green_size; /* 0x28 */ + unsigned char green_pos; /* 0x29 */ + unsigned char blue_size; /* 0x2a */ + unsigned char blue_pos; /* 0x2b */ + unsigned char rsvd_size; /* 0x2c */ + unsigned char rsvd_pos; /* 0x2d */ + unsigned short vesapm_seg; /* 0x2e */ + unsigned short vesapm_off; /* 0x30 */ + unsigned short pages; /* 0x32 */ + /* 0x34 -- 0x3f reserved for future expansion */ +}; + +struct screen_info_overlap { + __u8 reserved1[2]; /* 0x00 */ + __u16 ext_mem_k; /* 0x02 */ + __u8 reserved2[0x20 - 0x04]; /* 0x04 */ + __u16 cl_magic; /* 0x20 */ +#define CL_MAGIC_VALUE 0xA33F + __u16 cl_offset; /* 0x22 */ + __u8 reserved3[0x40 - 0x24]; /* 0x24 */ +}; + + +struct apm_bios_info { + __u16 version; + __u16 cseg; + __u32 offset; + __u16 cseg_16; + __u16 dseg; + __u16 flags; + __u16 cseg_len; + __u16 cseg_16_len; + __u16 dseg_len; +}; + +struct linux_boot_params { + union { /* 0x00 */ + struct screen_info info; + struct screen_info_overlap overlap; + } screen; + + struct apm_bios_info apm_bios_info; /* 0x40 */ + __u8 reserved4[0x80 - 0x54]; /* 0x54 */ + struct drive_info_struct drive_info; /* 0x80 */ + struct sys_desc_table sys_desc_table; /* 0xa0 */ + __u32 alt_mem_k; /* 0x1e0 */ + __u8 reserved5[4]; /* 0x1e4 */ + __u8 e820_map_nr; /* 0x1e8 */ + __u8 reserved6[8]; /* 0x1e9 */ + __u8 setup_sects; /* 0x1f1 */ + __u16 mount_root_rdonly; /* 0x1f2 */ + __u16 syssize; /* 0x1f4 */ + __u16 swapdev; /* 0x1f6 */ + __u16 ramdisk_flags; /* 0x1f8 */ +#define RAMDISK_IMAGE_START_MASK 0x07FF +#define RAMDISK_PROMPT_FLAG 0x8000 +#define RAMDISK_LOAD_FLAG 0x4000 + __u16 vid_mode; /* 0x1fa */ + __u16 root_dev; /* 0x1fc */ + __u8 reserved9[1]; /* 0x1fe */ + __u8 aux_device_info; /* 0x1ff */ + /* 2.00+ */ + __u8 reserved10[2]; /* 0x200 */ + __u8 header_magic[4]; /* 0x202 */ + __u16 protocol_version; /* 0x206 */ + __u8 reserved11[8]; /* 0x208 */ + __u8 loader_type; /* 0x210 */ +#define LOADER_TYPE_LOADLIN 1 +#define LOADER_TYPE_BOOTSECT_LOADER 2 +#define LOADER_TYPE_SYSLINUX 3 +#define LOADER_TYPE_ETHERBOOT 4 +#define LOADER_TYPE_UNKNOWN 0xFF + __u8 loader_flags; /* 0x211 */ + __u8 reserved12[2]; /* 0x212 */ + __u32 code32_start; /* 0x214 */ + __u32 initrd_start; /* 0x218 */ + __u32 initrd_size; /* 0x21c */ + __u8 reserved13[4]; /* 0x220 */ + /* 2.01+ */ + __u16 heap_end_ptr; /* 0x224 */ + __u8 reserved14[2]; /* 0x226 */ + /* 2.02+ */ + __u32 cmd_line_ptr; /* 0x228 */ + /* 2.03+ */ + __u32 ramdisk_max; /* 0x22c */ + __u8 reserved15[0x2d0 - 0x230]; /* 0x230 */ + struct e820entry e820_map[E820MAX]; /* 0x2d0 */ + __u64 shared_info; /* 0x550 */ + __u8 padding[0x800 - 0x558]; /* 0x558 */ + __u8 cmd_line[0x800]; /* 0x800 */ +} __attribute__((packed)); + +#endif /* __LINUX_BOOT_PARAMS_H__ */ diff --git a/tools/libxc/xc.h b/tools/libxc/xc.h index 72dfd3c4e6..8114faafb2 100644 --- a/tools/libxc/xc.h +++ b/tools/libxc/xc.h @@ -10,6 +10,8 @@ #define __XC_H__ #include +#include "linux_boot_params.h" + typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; @@ -105,6 +107,15 @@ xc_plan9_build (int xc_handle, unsigned int control_evtchn, unsigned long flags); +int xc_vmx_build(int xc_handle, + u32 domid, + const char *image_name, + struct mem_map *memmap, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags); + int xc_bvtsched_global_set(int xc_handle, unsigned long ctx_allow); @@ -208,4 +219,7 @@ void *xc_map_foreign_range(int xc_handle, u32 dom, void *xc_map_foreign_batch(int xc_handle, u32 dom, int prot, unsigned long *arr, int num ); +int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, + unsigned long max_pfns); + #endif /* __XC_H__ */ diff --git a/tools/libxc/xc_vmx_build.c b/tools/libxc/xc_vmx_build.c new file mode 100644 index 0000000000..13541701b0 --- /dev/null +++ b/tools/libxc/xc_vmx_build.c @@ -0,0 +1,865 @@ +/****************************************************************************** + * xc_vmx_build.c + */ + +#include "xc_private.h" +#define ELFSIZE 32 +#include "xc_elf.h" +#include +#include +#include "linux_boot_params.h" + +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) + +#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK) +#define round_pgdown(_p) ((_p)&PAGE_MASK) + +#define LINUX_BOOT_PARAMS_ADDR 0x00090000 +#define LINUX_KERNEL_ENTR_ADDR 0x00100000 +#define LINUX_PAGE_OFFSET 0xC0000000 + +struct domain_setup_info +{ + unsigned long v_start; + unsigned long v_end; + unsigned long v_kernstart; + unsigned long v_kernend; + unsigned long v_kernentry; + + unsigned int use_writable_pagetables; + unsigned int load_bsd_symtab; + + unsigned long symtab_addr; + unsigned long symtab_len; +}; + +static int +parseelfimage( + char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi); +static int +loadelfimage( + char *elfbase, int xch, u32 dom, unsigned long *parray, + unsigned long vstart); +static int +loadelfsymtab( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi); + +static long get_tot_pages(int xc_handle, u32 domid) +{ + dom0_op_t op; + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + op.u.getdomaininfo.ctxt = NULL; + return (do_dom0_op(xc_handle, &op) < 0) ? + -1 : op.u.getdomaininfo.tot_pages; +} + +int xc_get_pfn_list(int xc_handle, + u32 domid, + unsigned long *pfn_buf, + unsigned long max_pfns) +{ + dom0_op_t op; + int ret; + op.cmd = DOM0_GETMEMLIST; + op.u.getmemlist.domain = (domid_t)domid; + op.u.getmemlist.max_pfns = max_pfns; + op.u.getmemlist.buffer = pfn_buf; + + + if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 ) + return -1; + + ret = do_dom0_op(xc_handle, &op); + + (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long)); + + return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; +} + +static int copy_to_domain_page(int xc_handle, + u32 domid, + unsigned long dst_pfn, + void *src_page) +{ + void *vaddr = xc_map_foreign_range( + xc_handle, domid, PAGE_SIZE, PROT_WRITE, dst_pfn); + if ( vaddr == NULL ) + return -1; + memcpy(vaddr, src_page, PAGE_SIZE); + munmap(vaddr, PAGE_SIZE); + return 0; +} + +static int setup_guestos(int xc_handle, + u32 dom, + char *image, unsigned long image_size, + gzFile initrd_gfd, unsigned long initrd_len, + unsigned long nr_pages, + full_execution_context_t *ctxt, + const char *cmdline, + unsigned long shared_info_frame, + unsigned int control_evtchn, + unsigned long flags, + struct mem_map * mem_mapp) +{ + l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; + l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; + unsigned long *page_array = NULL; + unsigned long l2tab; + unsigned long l1tab; + unsigned long count, i; + shared_info_t *shared_info; + struct linux_boot_params * boot_paramsp; + __u16 * boot_gdtp; + mmu_t *mmu = NULL; + int rc; + + unsigned long nr_pt_pages; + unsigned long ppt_alloc; + + struct domain_setup_info dsi; + unsigned long vinitrd_start; + unsigned long vinitrd_end; + unsigned long vboot_params_start; + unsigned long vboot_params_end; + unsigned long vboot_gdt_start; + unsigned long vboot_gdt_end; + unsigned long vpt_start; + unsigned long vpt_end; + unsigned long v_end; + + memset(&dsi, 0, sizeof(struct domain_setup_info)); + + rc = parseelfimage(image, image_size, &dsi); + if ( rc != 0 ) + goto error_out; + + if (dsi.use_writable_pagetables) + xc_domain_setvmassist(xc_handle, dom, VMASST_CMD_enable, + VMASST_TYPE_writable_pagetables); + + if (dsi.load_bsd_symtab) + loadelfsymtab(image, xc_handle, dom, NULL, &dsi); + + if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 ) + { + PERROR("Guest OS must load to a page boundary.\n"); + goto error_out; + } + + /* + * Why do we need this? The number of page-table frames depends on the + * size of the bootstrap address space. But the size of the address space + * depends on the number of page-table frames (since each one is mapped + * read-only). We have a pair of simultaneous equations in two unknowns, + * which we solve by exhaustive search. + */ + nr_pt_pages = 1 + (nr_pages >> (PAGE_SHIFT - 2)); + vboot_params_start = LINUX_BOOT_PARAMS_ADDR; + vboot_params_end = vboot_params_start + PAGE_SIZE; + vboot_gdt_start = vboot_params_end; + vboot_gdt_end = vboot_gdt_start + PAGE_SIZE; + v_end = nr_pages << PAGE_SHIFT; + vpt_end = v_end - (16 << PAGE_SHIFT); /* leaving the top 64k untouched */ + vpt_start = vpt_end - (nr_pt_pages << PAGE_SHIFT); + vinitrd_end = vpt_start; + vinitrd_start = vinitrd_end - initrd_len; + vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1)); + + if(initrd_len == 0) + vinitrd_start = vinitrd_end = 0; + + printf("VIRTUAL MEMORY ARRANGEMENT:\n" + " Boot_params: %08lx->%08lx\n" + " boot_gdt: %08lx->%08lx\n" + " Loaded kernel: %08lx->%08lx\n" + " Init. ramdisk: %08lx->%08lx\n" + " Page tables: %08lx->%08lx\n" + " TOTAL: %08lx->%08lx\n", + vboot_params_start, vboot_params_end, + vboot_gdt_start, vboot_gdt_end, + dsi.v_kernstart, dsi.v_kernend, + vinitrd_start, vinitrd_end, + vpt_start, vpt_end, + dsi.v_start, v_end); + printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry); + printf(" INITRD LENGTH: %08lx\n", initrd_len); + + if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) ) + { + printf("Initial guest OS requires too much space\n" + "(%luMB is greater than %luMB limit)\n", + (v_end-dsi.v_start)>>20, (nr_pages<>20); + goto error_out; + } + + if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL ) + { + PERROR("Could not allocate memory"); + goto error_out; + } + + if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages ) + { + PERROR("Could not get the page frame list"); + goto error_out; + } + + loadelfimage(image, xc_handle, dom, page_array, dsi.v_start); + + if (dsi.load_bsd_symtab) + loadelfsymtab(image, xc_handle, dom, page_array, &dsi); + + /* Load the initial ramdisk image. */ + if ( initrd_len != 0 ) + { + for ( i = (vinitrd_start - dsi.v_start); + i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE ) + { + char page[PAGE_SIZE]; + if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 ) + { + PERROR("Error reading initrd image, could not"); + goto error_out; + } + copy_to_domain_page(xc_handle, dom, + page_array[i>>PAGE_SHIFT], page); + } + } + + if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL ) + goto error_out; + + /* First allocate page for page dir. */ + ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT; + l2tab = page_array[ppt_alloc++] << PAGE_SHIFT; + ctxt->pt_base = l2tab; + + /* Initialise the page tables. */ + if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + l2tab >> PAGE_SHIFT)) == NULL ) + goto error_out; + memset(vl2tab, 0, PAGE_SIZE); + vl2e = &vl2tab[l2_table_offset(dsi.v_start)]; + for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ ) + { + if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 ) + { + l1tab = page_array[ppt_alloc++] << PAGE_SHIFT; + if ( vl1tab != NULL ) + munmap(vl1tab, PAGE_SIZE); + if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, + l1tab >> PAGE_SHIFT)) == NULL ) + { + munmap(vl2tab, PAGE_SIZE); + goto error_out; + } + memset(vl1tab, 0, PAGE_SIZE); + vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<= ((vpt_start-dsi.v_start)>>PAGE_SHIFT)) && + (count < ((vpt_end -dsi.v_start)>>PAGE_SHIFT)) ) + *vl1e &= ~_PAGE_RW; + vl1e++; + } + munmap(vl1tab, PAGE_SIZE); + munmap(vl2tab, PAGE_SIZE); + + /* + * Pin down l2tab addr as page dir page - causes hypervisor to provide + * correct protection for the page + */ + if ( add_mmu_update(xc_handle, mmu, + l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) ) + goto error_out; + + boot_paramsp = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT]); + memset(boot_paramsp, 0, sizeof(*boot_paramsp)); + + strncpy(boot_paramsp->cmd_line, cmdline, 0x800); + boot_paramsp->cmd_line[0x800-1] = '\0'; + boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line); + + boot_paramsp->setup_sects = 0; + boot_paramsp->mount_root_rdonly = 1; + boot_paramsp->swapdev = 0x0; + boot_paramsp->ramdisk_flags = 0x0; + boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */ + + /* we don't have a ps/2 mouse now. + * 0xAA means a aux mouse is there. + * See detect_auxiliary_port() in pc_keyb.c. + */ + boot_paramsp->aux_device_info = 0x0; + + boot_paramsp->header_magic[0] = 0x48; /* "H" */ + boot_paramsp->header_magic[1] = 0x64; /* "d" */ + boot_paramsp->header_magic[2] = 0x72; /* "r" */ + boot_paramsp->header_magic[3] = 0x53; /* "S" */ + + boot_paramsp->protocol_version = 0x0203; /* 2.03 */ + boot_paramsp->loader_type = 0x71; /* GRUB */ + boot_paramsp->loader_flags = 0x1; /* loaded high */ + boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */ + boot_paramsp->initrd_start = vinitrd_start; + boot_paramsp->initrd_size = initrd_len; + + i = (nr_pages >> (PAGE_SHIFT - 10)) - (1 << 10) - 4; + boot_paramsp->alt_mem_k = i; /* alt_mem_k */ + boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */ + + /* + * Stuff SCREAN_INFO + */ + boot_paramsp->screen.info.orig_x = 0; + boot_paramsp->screen.info.orig_y = 0; + boot_paramsp->screen.info.orig_video_page = 8; + boot_paramsp->screen.info.orig_video_mode = 3; + boot_paramsp->screen.info.orig_video_cols = 80; + boot_paramsp->screen.info.orig_video_ega_bx = 0; + boot_paramsp->screen.info.orig_video_lines = 25; + boot_paramsp->screen.info.orig_video_isVGA = 1; + boot_paramsp->screen.info.orig_video_points = 0x0010; + + /* seems we may NOT stuff boot_paramsp->apm_bios_info */ + /* seems we may NOT stuff boot_paramsp->drive_info */ + /* seems we may NOT stuff boot_paramsp->sys_desc_table */ + *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800; + boot_paramsp->drive_info.dummy[2] = 4; + boot_paramsp->drive_info.dummy[14] = 32; + + boot_paramsp->e820_map_nr = mem_mapp->nr_map; + for (i=0; inr_map; i++) { + boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr; + boot_paramsp->e820_map[i].size = mem_mapp->map[i].size; + boot_paramsp->e820_map[i].type = mem_mapp->map[i].type; + } + munmap(boot_paramsp, PAGE_SIZE); + + boot_gdtp = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT]); + memset(boot_gdtp, 0, PAGE_SIZE); + boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */ + boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */ + boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */ + boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */ + munmap(boot_gdtp, PAGE_SIZE); + + /* shared_info page starts its life empty. */ + shared_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame); + memset(shared_info, 0, sizeof(shared_info_t)); + /* Mask all upcalls... */ + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_data[i].evtchn_upcall_mask = 1; + munmap(shared_info, PAGE_SIZE); + + /* Send the page update requests down to the hypervisor. */ + if ( finish_mmu_updates(xc_handle, mmu) ) + goto error_out; + + free(mmu); + free(page_array); + + /* + * Initial register values: + */ + ctxt->cpu_ctxt.ds = 0x68; + ctxt->cpu_ctxt.es = 0x0; + ctxt->cpu_ctxt.fs = 0x0; + ctxt->cpu_ctxt.gs = 0x0; + ctxt->cpu_ctxt.ss = 0x68; + ctxt->cpu_ctxt.cs = 0x60; + ctxt->cpu_ctxt.eip = dsi.v_kernentry; + ctxt->cpu_ctxt.edx = vboot_gdt_start; + ctxt->cpu_ctxt.eax = 0x800; + ctxt->cpu_ctxt.esp = vboot_gdt_end; + ctxt->cpu_ctxt.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */ + ctxt->cpu_ctxt.ecx = mem_mapp->nr_map; + ctxt->cpu_ctxt.esi = vboot_params_start; + ctxt->cpu_ctxt.edi = vboot_params_start + 0x2d0; + + ctxt->cpu_ctxt.eflags = (1<<2); + + return 0; + + error_out: + if ( mmu != NULL ) + free(mmu); + if ( page_array != NULL ) + free(page_array); + return -1; +} + +static unsigned long get_filesz(int fd) +{ + u16 sig; + u32 _sz = 0; + unsigned long sz; + + lseek(fd, 0, SEEK_SET); + read(fd, &sig, sizeof(sig)); + sz = lseek(fd, 0, SEEK_END); + if ( sig == 0x8b1f ) /* GZIP signature? */ + { + lseek(fd, -4, SEEK_END); + read(fd, &_sz, 4); + sz = _sz; + } + lseek(fd, 0, SEEK_SET); + + return sz; +} + +static char *read_kernel_image(const char *filename, unsigned long *size) +{ + int kernel_fd = -1; + gzFile kernel_gfd = NULL; + char *image = NULL; + unsigned int bytes; + + if ( (kernel_fd = open(filename, O_RDONLY)) < 0 ) + { + PERROR("Could not open kernel image"); + goto out; + } + + *size = get_filesz(kernel_fd); + + if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL ) + { + PERROR("Could not allocate decompression state for state file"); + goto out; + } + + if ( (image = malloc(*size)) == NULL ) + { + PERROR("Could not allocate memory for kernel image"); + goto out; + } + + if ( (bytes = gzread(kernel_gfd, image, *size)) != *size ) + { + PERROR("Error reading kernel image, could not" + " read the whole image (%d != %ld).", bytes, *size); + free(image); + image = NULL; + } + + out: + if ( kernel_gfd != NULL ) + gzclose(kernel_gfd); + else if ( kernel_fd >= 0 ) + close(kernel_fd); + return image; +} + +#define VMX_FEATURE_FLAG 0x20 + +int vmx_identify(void) +{ + int eax, ecx; + + __asm__ __volatile__ ("cpuid" + : "=a" (eax), "=c" (ecx) + : "0" (1) + : "bx", "dx"); + if (!(ecx & VMX_FEATURE_FLAG)) { + return -1; + } + return 0; +} + +int xc_vmx_build(int xc_handle, + u32 domid, + const char *image_name, + struct mem_map *mem_mapp, + const char *ramdisk_name, + const char *cmdline, + unsigned int control_evtchn, + unsigned long flags) +{ + dom0_op_t launch_op, op; + int initrd_fd = -1; + gzFile initrd_gfd = NULL; + int rc, i; + full_execution_context_t st_ctxt, *ctxt = &st_ctxt; + unsigned long nr_pages; + char *image = NULL; + unsigned long image_size, initrd_size=0; + + if ( vmx_identify() < 0 ) + { + PERROR("CPU doesn't support VMX Extensions"); + goto error_out; + } + + if ( (nr_pages = get_tot_pages(xc_handle, domid)) < 0 ) + { + PERROR("Could not find total pages for domain"); + goto error_out; + } + + if ( (image = read_kernel_image(image_name, &image_size)) == NULL ) + goto error_out; + + if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) ) + { + if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 ) + { + PERROR("Could not open the initial ramdisk image"); + goto error_out; + } + + initrd_size = get_filesz(initrd_fd); + + if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL ) + { + PERROR("Could not allocate decompression state for initrd"); + goto error_out; + } + } + + if ( mlock(&st_ctxt, sizeof(st_ctxt) ) ) + { + PERROR("Unable to mlock ctxt"); + return 1; + } + + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)domid; + op.u.getdomaininfo.ctxt = ctxt; + if ( (do_dom0_op(xc_handle, &op) < 0) || + ((u16)op.u.getdomaininfo.domain != domid) ) + { + PERROR("Could not get info on domain"); + goto error_out; + } + if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) || + (ctxt->pt_base != 0) ) + { + ERROR("Domain is already constructed"); + goto error_out; + } + + if ( setup_guestos(xc_handle, domid, image, image_size, + initrd_gfd, initrd_size, nr_pages, + ctxt, cmdline, + op.u.getdomaininfo.shared_info_frame, + control_evtchn, flags, mem_mapp) < 0 ) + { + ERROR("Error constructing guest OS"); + goto error_out; + } + + if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( initrd_gfd ) + gzclose(initrd_gfd); + if ( image != NULL ) + free(image); + + ctxt->flags = ECF_VMX_GUEST; + /* FPU is set up to default initial state. */ + memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); + + /* Virtual IDT is empty at start-of-day. */ + for ( i = 0; i < 256; i++ ) + { + ctxt->trap_ctxt[i].vector = i; + ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS; + } + ctxt->fast_trap_idx = 0; + + /* No LDT. */ + ctxt->ldt_ents = 0; + + /* Use the default Xen-provided GDT. */ + ctxt->gdt_ents = 0; + + /* Ring 1 stack is the initial stack. */ +/* + ctxt->guestos_ss = FLAT_GUESTOS_DS; + ctxt->guestos_esp = vstartinfo_start; +*/ + /* No debugging. */ + memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); + + /* No callback handlers. */ + ctxt->event_callback_cs = FLAT_GUESTOS_CS; + ctxt->event_callback_eip = 0; + ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS; + ctxt->failsafe_callback_eip = 0; + + memset( &launch_op, 0, sizeof(launch_op) ); + + launch_op.u.builddomain.domain = (domid_t)domid; + launch_op.u.builddomain.ctxt = ctxt; + + launch_op.cmd = DOM0_BUILDDOMAIN; + rc = do_dom0_op(xc_handle, &launch_op); + return rc; + + error_out: + if ( initrd_gfd != NULL ) + gzclose(initrd_gfd); + else if ( initrd_fd >= 0 ) + close(initrd_fd); + if ( image != NULL ) + free(image); + + return -1; +} + +static inline int is_loadable_phdr(Elf_Phdr *phdr) +{ + return ((phdr->p_type == PT_LOAD) && + ((phdr->p_flags & (PF_W|PF_X)) != 0)); +} + +static int parseelfimage(char *elfbase, + unsigned long elfsize, + struct domain_setup_info *dsi) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase; + Elf_Phdr *phdr; + Elf_Shdr *shdr; + unsigned long kernstart = ~0UL, kernend=0UL; + char *shstrtab; + int h; + + if ( !IS_ELF(*ehdr) ) + { + ERROR("Kernel image does not have an ELF header."); + return -EINVAL; + } + + if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize ) + { + ERROR("ELF program headers extend beyond end of image."); + return -EINVAL; + } + + if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize ) + { + ERROR("ELF section headers extend beyond end of image."); + return -EINVAL; + } + + /* Find the section-header strings table. */ + if ( ehdr->e_shstrndx == SHN_UNDEF ) + { + ERROR("ELF image has no section-header strings table (shstrtab)."); + return -EINVAL; + } + shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff + + (ehdr->e_shstrndx*ehdr->e_shentsize)); + shstrtab = elfbase + shdr->sh_offset; + + for ( h = 0; h < ehdr->e_phnum; h++ ) + { + phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); + if ( !is_loadable_phdr(phdr) ) + continue; + if ( phdr->p_vaddr < kernstart ) + kernstart = phdr->p_vaddr; + if ( (phdr->p_vaddr + phdr->p_memsz) > kernend ) + kernend = phdr->p_vaddr + phdr->p_memsz; + } + + if ( (kernstart > kernend) || + (ehdr->e_entry < kernstart) || + (ehdr->e_entry > kernend) ) + { + ERROR("Malformed ELF image."); + return -EINVAL; + } + + dsi->v_start = 0x00000000; + dsi->use_writable_pagetables = 0; + dsi->load_bsd_symtab = 0; + + dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET; + dsi->v_kernend = kernend - LINUX_PAGE_OFFSET; + dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR; + + dsi->v_end = dsi->v_kernend; + + return 0; +} + +static int +loadelfimage( + char *elfbase, int xch, u32 dom, unsigned long *parray, + unsigned long vstart) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase; + Elf_Phdr *phdr; + int h; + + char *va; + unsigned long pa, done, chunksz; + + for ( h = 0; h < ehdr->e_phnum; h++ ) + { + phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); + if ( !is_loadable_phdr(phdr) ) + continue; + + for ( done = 0; done < phdr->p_filesz; done += chunksz ) + { + pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET; + va = xc_map_foreign_range( + xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]); + chunksz = phdr->p_filesz - done; + if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) ) + chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1)); + memcpy(va + (pa & (PAGE_SIZE-1)), + elfbase + phdr->p_offset + done, chunksz); + munmap(va, PAGE_SIZE); + } + + for ( ; done < phdr->p_memsz; done += chunksz ) + { + pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET; + va = xc_map_foreign_range( + xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]); + chunksz = phdr->p_memsz - done; + if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) ) + chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1)); + memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz); + munmap(va, PAGE_SIZE); + } + } + + return 0; +} + +static void +map_memcpy( + unsigned long dst, char *src, unsigned long size, + int xch, u32 dom, unsigned long *parray, unsigned long vstart) +{ + char *va; + unsigned long chunksz, done, pa; + + for ( done = 0; done < size; done += chunksz ) + { + pa = dst + done - vstart; + va = xc_map_foreign_range( + xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]); + chunksz = size - done; + if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) ) + chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1)); + memcpy(va + (pa & (PAGE_SIZE-1)), src + done, chunksz); + munmap(va, PAGE_SIZE); + } +} + +#define ELFROUND (ELFSIZE / 8) + +static int +loadelfsymtab( + char *elfbase, int xch, u32 dom, unsigned long *parray, + struct domain_setup_info *dsi) +{ + Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr; + Elf_Shdr *shdr; + unsigned long maxva, symva; + char *p; + int h, i; + + p = malloc(sizeof(int) + sizeof(Elf_Ehdr) + + ehdr->e_shnum * sizeof(Elf_Shdr)); + if (p == NULL) + return 0; + + maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1); + symva = maxva; + maxva += sizeof(int); + dsi->symtab_addr = maxva; + dsi->symtab_len = 0; + maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr); + maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1); + + shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr)); + memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr)); + + for ( h = 0; h < ehdr->e_shnum; h++ ) + { + if ( shdr[h].sh_type == SHT_STRTAB ) + { + /* Look for a strtab @i linked to symtab @h. */ + for ( i = 0; i < ehdr->e_shnum; i++ ) + if ( (shdr[i].sh_type == SHT_SYMTAB) && + (shdr[i].sh_link == h) ) + break; + /* Skip symtab @h if we found no corresponding strtab @i. */ + if ( i == ehdr->e_shnum ) + { + shdr[h].sh_offset = 0; + continue; + } + } + + if ( (shdr[h].sh_type == SHT_STRTAB) || + (shdr[h].sh_type == SHT_SYMTAB) ) + { + if ( parray != NULL ) + map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size, + xch, dom, parray, dsi->v_start); + + /* Mangled to be based on ELF header location. */ + shdr[h].sh_offset = maxva - dsi->symtab_addr; + + dsi->symtab_len += shdr[h].sh_size; + maxva += shdr[h].sh_size; + maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1); + } + + shdr[h].sh_name = 0; /* Name is NULL. */ + } + + if ( dsi->symtab_len == 0 ) + { + dsi->symtab_addr = 0; + goto out; + } + + if ( parray != NULL ) + { + *(int *)p = maxva - dsi->symtab_addr; + sym_ehdr = (Elf_Ehdr *)(p + sizeof(int)); + memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr)); + sym_ehdr->e_phoff = 0; + sym_ehdr->e_shoff = sizeof(Elf_Ehdr); + sym_ehdr->e_phentsize = 0; + sym_ehdr->e_phnum = 0; + sym_ehdr->e_shstrndx = SHN_UNDEF; + + /* Copy total length, crafted ELF header and section header table */ + map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) + + ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray, + dsi->v_start); + } + + dsi->symtab_len = maxva - dsi->symtab_addr; + dsi->v_end = round_pgup(maxva); + + out: + if ( p != NULL ) + free(p); + + return 0; +} diff --git a/tools/python/xen/lowlevel/xc/xc.c b/tools/python/xen/lowlevel/xc/xc.c index d2b7da0eba..b4ad35b9c0 100644 --- a/tools/python/xen/lowlevel/xc/xc.c +++ b/tools/python/xen/lowlevel/xc/xc.c @@ -16,6 +16,7 @@ #include #include "xc_private.h" #include "gzip_stream.h" +#include "linux_boot_params.h" /* Needed for Python versions earlier than 2.3. */ #ifndef PyMODINIT_FUNC @@ -393,6 +394,87 @@ static PyObject *pyxc_plan9_build(PyObject *self, return zero; } +static PyObject *pyxc_vmx_build(PyObject *self, + PyObject *args, + PyObject *kwds) +{ + XcObject *xc = (XcObject *)self; + + u32 dom; + char *image, *ramdisk = NULL, *cmdline = ""; + PyObject *memmap; + int control_evtchn, flags = 0; + int numItems, i; + struct mem_map mem_map; + + static char *kwd_list[] = { "dom", "control_evtchn", + "image", "memmap", + "ramdisk", "cmdline", "flags", + NULL }; + + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iisO!|ssi", kwd_list, + &dom, &control_evtchn, + &image, &PyList_Type, &memmap, + &ramdisk, &cmdline, &flags) ) + return NULL; + + memset(&mem_map, 0, sizeof(mem_map)); + /* Parse memmap */ + + /* get the number of lines passed to us */ + numItems = PyList_Size(memmap) - 1; /* removing the line + containing "memmap" */ + printf ("numItems: %d\n", numItems); + mem_map.nr_map = numItems; + + + /* should raise an error here. */ + if (numItems < 0) return NULL; /* Not a list */ + + + /* iterate over items of the list, grabbing ranges and parsing them */ + for (i = 1; i <= numItems; i++) { // skip over "memmap" + PyObject *item, *f1, *f2, *f3, *f4; + int numFields; + unsigned long lf1, lf2, lf3, lf4; + char *sf1, *sf2; + + /* grab the string object from the next element of the list */ + item = PyList_GetItem(memmap, i); /* Can't fail */ + + /* get the number of lines passed to us */ + numFields = PyList_Size(item); + + if (numFields != 4) + return NULL; + + f1 = PyList_GetItem(item, 0); + f2 = PyList_GetItem(item, 1); + f3 = PyList_GetItem(item, 2); + f4 = PyList_GetItem(item, 3); + + /* Convert objects to strings/longs */ + sf1 = PyString_AsString(f1); + sf2 = PyString_AsString(f2); + lf3 = PyLong_AsLong(f3); + lf4 = PyLong_AsLong(f4); + sscanf(sf1, "%lx", &lf1); + sscanf(sf2, "%lx", &lf2); + + mem_map.map[i-1].addr = lf1; + mem_map.map[i-1].size = lf2 - lf1; + mem_map.map[i-1].type = lf3; + mem_map.map[i-1].caching_attr = lf4; + } + + if ( xc_vmx_build(xc->xc_handle, dom, image, &mem_map, + ramdisk, cmdline, control_evtchn, flags) != 0 ) + return PyErr_SetFromErrno(xc_error); + + Py_INCREF(zero); + return zero; +} + static PyObject *pyxc_bvtsched_global_set(PyObject *self, PyObject *args, PyObject *kwds) @@ -943,6 +1025,17 @@ static PyMethodDef pyxc_methods[] = { " cmdline [str, n/a]: Kernel parameters, if any.\n\n" "Returns: [int] 0 on success; -1 on error.\n" }, + { "vmx_build", + (PyCFunction)pyxc_vmx_build, + METH_VARARGS | METH_KEYWORDS, "\n" + "Build a new Linux guest OS.\n" + " dom [int]: Identifier of domain to build into.\n" + " image [str]: Name of kernel image file. May be gzipped.\n" + " memmap [str]: Memory map.\n\n" + " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" + " cmdline [str, n/a]: Kernel parameters, if any.\n\n" + "Returns: [int] 0 on success; -1 on error.\n" }, + { "bvtsched_global_set", (PyCFunction)pyxc_bvtsched_global_set, METH_VARARGS | METH_KEYWORDS, "\n" diff --git a/tools/python/xen/util/memmap.py b/tools/python/xen/util/memmap.py new file mode 100644 index 0000000000..2899a87535 --- /dev/null +++ b/tools/python/xen/util/memmap.py @@ -0,0 +1,41 @@ +mem_caching_attr = { + 'UC' : 0, + 'WC' : 1, + 'WT' : 4, + 'WP' : 5, + 'WB' : 6, + }; + +e820_mem_type = { + 'AddressRangeMemory' : 1, + 'AddressRangeReserved' : 2, + 'AddressRangeACPI' : 3, + 'AddressRangeNVS' : 4, + 'AddressRangeIO' : 16, + 'AddressRangeShared' : 17, +}; + +MT_COL = 2 +MA_COL = 3 + +def strmap(row): + if (type(row) != type([])): + return row + row[MT_COL] = e820_mem_type[row[MT_COL]] + row[MA_COL] = mem_caching_attr[row[MA_COL]] + return row + +def memmap_parse(memmap): + return map(strmap, memmap) + +if __name__ == '__main__': + memmap = [ 'memmap', + [ '1', '2', 'AddressRangeMemory', 'UC'], + [ '1', '2', 'AddressRangeReserved', 'UC'], + [ '1', '2', 'AddressRangeACPI', 'WB'], + [ '1', '2', 'AddressRangeNVS', 'WB'], + [ '1', '2', 'AddressRangeIO', 'WB'], + [ '1', '2', 'AddressRangeShared', 'WB']] + print memmap_parse(memmap); + + diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py index 1a229a5599..c92bdf08bc 100644 --- a/tools/python/xen/xend/XendDomainInfo.py +++ b/tools/python/xen/xend/XendDomainInfo.py @@ -20,6 +20,7 @@ from twisted.internet import defer import xen.lowlevel.xc; xc = xen.lowlevel.xc.new() import xen.util.ip from xen.util.ip import _readline, _readlines +from xen.xend.server import channel import sxp @@ -319,6 +320,7 @@ class XendDomainInfo: self.restart_time = None self.console_port = None self.savedinfo = None + self.is_vmx = 0 def setdom(self, dom): """Set the domain id. @@ -720,7 +722,7 @@ class XendDomainInfo: log.debug('init_domain> Created domain=%d name=%s memory=%d', dom, name, memory) self.setdom(dom) - def build_domain(self, ostype, kernel, ramdisk, cmdline): + def build_domain(self, ostype, kernel, ramdisk, cmdline, memmap): """Build the domain boot image. """ if self.recreate or self.restore: return @@ -735,17 +737,26 @@ class XendDomainInfo: flags = 0 if self.netif_backend: flags |= SIF_NET_BE_DOMAIN if self.blkif_backend: flags |= SIF_BLK_BE_DOMAIN - err = buildfn(dom = dom, - image = kernel, - control_evtchn = self.console.getRemotePort(), - cmdline = cmdline, - ramdisk = ramdisk, - flags = flags) + if ostype == "vmx": + err = buildfn(dom = dom, + image = kernel, + control_evtchn = 0, + memmap = memmap, + cmdline = cmdline, + ramdisk = ramdisk, + flags = flags) + else: + err = buildfn(dom = dom, + image = kernel, + control_evtchn = self.console.getRemotePort(), + cmdline = cmdline, + ramdisk = ramdisk, + flags = flags) if err != 0: raise VmError('Building domain failed: type=%s dom=%d err=%d' % (ostype, dom, err)) - def create_domain(self, ostype, kernel, ramdisk, cmdline): + def create_domain(self, ostype, kernel, ramdisk, cmdline, memmap=''): """Create a domain. Builds the image but does not configure it. @param ostype: OS type @@ -760,7 +771,7 @@ class XendDomainInfo: else: self.console = xendConsole.console_create( self.dom, console_port=self.console_port) - self.build_domain(ostype, kernel, ramdisk, cmdline) + self.build_domain(ostype, kernel, ramdisk, cmdline, memmap) self.image = kernel self.ramdisk = ramdisk self.cmdline = cmdline @@ -804,6 +815,18 @@ class XendDomainInfo: index[dev_name] = dev_index + 1 deferred = defer.DeferredList(dlist, fireOnOneErrback=1) deferred.addErrback(dlist_err) + if self.is_vmx: + device_model = sxp.child_value(self.config, 'device_model') + device_config = sxp.child_value(self.config, 'device_config') + memory = sxp.child_value(self.config, "memory") + # Create an event channel + device_channel = channel.eventChannel(0, self.dom) + # Fork and exec device_model -f device_config + os.system(device_model + + " -f %s" % device_config + + " -d %d" % self.dom + + " -p %d" % device_channel['port1'] + + " -m %s &" % memory) return deferred def device_create(self, dev_config): @@ -1091,7 +1114,33 @@ def vm_image_plan9(vm, image): vm.create_domain("plan9", kernel, ramdisk, cmdline) return vm - +def vm_image_vmx(vm, image): + """Create a VM for the VMX environment. + + @param name: vm name + @param memory: vm memory + @param image: image config + @return: vm + """ + kernel = sxp.child_value(image, "kernel") + cmdline = "" + ip = sxp.child_value(image, "ip", "dhcp") + if ip: + cmdline += " ip=" + ip + root = sxp.child_value(image, "root") + if root: + cmdline += " root=" + root + args = sxp.child_value(image, "args") + if args: + cmdline += " " + args + ramdisk = sxp.child_value(image, "ramdisk", '') + memmap = sxp.child_value(vm.config, "memmap", '') + memmap = sxp.parse(open(memmap))[0] + from xen.util.memmap import memmap_parse + memmap = memmap_parse(memmap) + vm.create_domain("vmx", kernel, ramdisk, cmdline, memmap) + vm.is_vmx = 1 + return vm def vm_dev_vif(vm, val, index, change=0): """Create a virtual network interface (vif). @@ -1215,6 +1264,7 @@ def vm_field_maxmem(vm, config, val, index): # Register image handlers. add_image_handler('linux', vm_image_linux) add_image_handler('plan9', vm_image_plan9) +add_image_handler('vmx', vm_image_vmx) # Register device handlers. add_device_handler('vif', vm_dev_vif) diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py index 9d977dd337..633888a643 100644 --- a/tools/python/xen/xm/create.py +++ b/tools/python/xen/xm/create.py @@ -210,6 +210,18 @@ gopts.var('nfs_root', val="PATH", fn=set_value, default=None, use="Set the path of the root NFS directory.") +gopts.var('memmap', val='FILE', + fn=set_value, default='', + use="Path to memap SXP file.") + +gopts.var('device_model', val='FILE', + fn=set_value, default='', + use="Path to device model program.") + +gopts.var('device_config', val='FILE', + fn=set_value, default='', + use="Path to device model configuration.") + def strip(pre, s): """Strip prefix 'pre' if present. """ @@ -309,6 +321,15 @@ def configure_vfr(config, vals): config_vfr.append(['vif', ['id', idx], ['ip', ip]]) config.append(config_vfr) +def configure_vmx(config_devs, vals): + """Create the config for VMX devices. + """ + memmap = vals.memmap + device_model = vals.device_model + device_config = vals.device_config + config_devs.append(['memmap', memmap]) + config_devs.append(['device_model', device_model]) + config_devs.append(['device_config', device_config]) def make_config(vals): """Create the domain configuration. @@ -337,6 +358,7 @@ def make_config(vals): configure_disks(config_devs, vals) configure_pci(config_devs, vals) configure_vifs(config_devs, vals) + configure_vmx(config_devs, vals) config += config_devs return config -- 2.30.2